* Author: Joe Tatarka
* Name: 2_1_aggregates_build.do
* Purpose: Create aggregate dataset to create figures for the paper. Bring in Census and BLS aggregate sales and employees and CPI deflators. Also bring in aggregates from our SafeGraph sample. Creates aggregates_build.dta

* Set Global File Paths
global root= "T:/service_industries/replication_package"

global raw_root = "${root}/datasets/raw"
global intermediate_root = "${root}/datasets/intermediate"
global built_root = "${root}/datasets/built"
global exhibits_root = "${root}/exhibits"

* set more off so 
set more off 

***********************************************************************
***** 1. Bring in Inflation Indexes
***************************************************************

************************************
*** Bring in inflation deflators 
************************************

**** Limited-Service Deflators (NSA)

tempfile cpi_limited
import excel "${raw_root}/cpi/limited_cpi_nsa_feb_2026.xlsx", clear firstrow 

rename *, lower
drop seriesid 
rename value cpi_limited

* Destring cpi variable 
replace cpi_limited = "" if cpi_limited == "-"
destring cpi_limited, replace

drop if substr(period, 1,1) == "S"

replace period = substr(period,2,2)
destring period, replace 

gen year_month_gs = ym(year, period)
format year_month_gs %tm

gen base = cpi_limited if year_month_gs == monthly("Jan2019", "MY")
egen base_jan_2019 = max(base)

replace cpi_limited = 100*(cpi_limited/base_jan_2019)

keep cpi_limited year_month_gs

save `cpi_limited', replace

*************************************
*** Food Away from Home deflator (NSA)
***************************************

tempfile cpi_food_nsa 

import excel "${raw_root}/cpi/food_away_cpi_nsa_feb_2026.xlsx", clear firstrow

rename *, lower
drop seriesid 
rename value cpi_food

* Destring cpi variable 
replace cpi_food = "" if cpi_food == "-"
destring cpi_food, replace

drop if substr(period, 1,1) == "S"

replace period = substr(period,2,2)
destring period, replace 

gen year_month_gs = ym(year, period)
format year_month_gs %tm

gen double base = cpi_food if year_month_gs == monthly("Jan2019", "MY")
egen double base_jan_2019 = max(base)

replace cpi_food = 100*(cpi_food/base_jan_2019)

keep cpi_food year_month_gs
rename cpi_food cpi_food_nsa

save `cpi_food_nsa', replace


***************************************
*** Food Away from Home deflator (SA)
***************************************

tempfile cpi_food_sa 

import excel "${raw_root}/cpi/food_away_cpi_sa_feb_2026.xlsx", clear firstrow

rename *, lower
drop seriesid 
rename value cpi_food

* Destring cpi variable 
replace cpi_food = "" if cpi_food == "-"
destring cpi_food, replace

drop if substr(period, 1,1) == "S"

replace period = substr(period,2,2)
destring period, replace 

gen year_month_gs = ym(year, period)
format year_month_gs %tm

gen double base = cpi_food if year_month_gs == monthly("Jan2019", "MY")
egen double base_jan_2019 = max(base)

replace cpi_food = 100*(cpi_food/base_jan_2019)

keep cpi_food year_month_gs
rename cpi_food cpi_food_sa

save `cpi_food_sa', replace

*** combine deflators
tempfile deflators
merge 1:1 year_month_gs using `cpi_limited', nogen
merge 1:1 year_month_gs using `cpi_food_nsa', nogen
save `deflators', replace

***********************************************
**** 2. Prepare Safegraph Aggregates
***********************************************

* We want monthly aggregates of spend, emp, and visits for our main_build sample.

use placekey year_month_gs spend nominal_spend visits emp using "${built_root}/main_build.dta", clear
rename spend spend_sg
rename visits visits_sg 
rename emp emp_sg
rename nominal_spend nominal_spend_sg
tempfile sg
collapse (sum) spend_sg visits_sg emp_sg nominal_spend_sg, by(year_month_gs)

save `sg', replace

********************************************************************************
******* 3. Bring in Census Monthly Retail Sales data
********************************************************************************
******* Census monthly sales 

forval yr = 1992/2024 {
	
	tempfile census_`yr'
	import excel "${raw_root}/census_monthly_retail_trade/mrtssales92-present_feb_2026.xlsx", clear firstrow sheet("`yr'") cellrange(B5:O109) allstring
	
	replace B = "food_nsa" if _n == 61
	replace B = "limited" if _n == 65
	replace B = "food_sa" if _n == 104
	keep if _n == 61 | _n == 65 | _n == 104
	drop TOTAL
	
	foreach var of varlist _all {
		rename `var' sales_prelim`var'
	}

	rename sales_prelimB rest_type

	* Destring
	foreach v of varlist sales_prelimJan`yr'-sales_prelimDec`yr' {
		replace `v' = "" if `v' == "(S)" | `v' == "NA" | `v' == "N/A"
		replace `v' = ustrregexra(`v', "[,[:space:]]", "")
		replace `v' = ustrregexra(`v', "[^0-9\.\-]", "")
		destring `v', replace
	}

	reshape long sales_prelim, i(rest_type) j(year_month_prelim) string


	destring sales_prelim, replace


	gen year_month_gs = monthly(year_month_prelim, "MY")
	format year_month_gs %tm
	drop year_month_prelim
	reshape wide sales_prelim, i(year_month_gs) j(rest_type) string
	reshape long
	replace sales_prelim = (sales_prelim*1000000)
	format sales_prelim %15.0g
	rename sales_prelim spend_

	reshape wide spend_, i(year_month_gs) j(rest_type) string
	
	save `census_`yr'', replace

}


*** 2025 still preliminary data 
tempfile census_2025

import excel "${raw_root}/census_monthly_retail_trade/mrtssales92-present_feb_2026.xlsx", clear firstrow sheet("2025") cellrange(B5:O109) allstring
	
	replace B = "food_nsa" if _n == 61
	replace B = "limited" if _n == 65
	replace B = "food_sa" if _n == 104
	keep if _n == 61 | _n == 65 | _n == 104
	drop CYCUM PYCUM 

capture confirm variable Nov2025p
if !_rc rename Nov2025p Nov2025
	
foreach var of varlist _all {
	rename `var' sales_prelim`var'
}
	
rename sales_prelimB rest_type

* Destring
foreach v of varlist sales_prelimJan2025-sales_prelimNov2025 {
    replace `v' = "" if `v' == "(S)" | `v' == "NA" | `v' == "N/A"
    replace `v' = ustrregexra(`v', "[,[:space:]]", "")
    replace `v' = ustrregexra(`v', "[^0-9\.\-]", "")
    destring `v', replace
}

reshape long sales_prelim, i(rest_type) j(year_month_prelim) string

destring sales_prelim, replace
gen year_month_gs = monthly(year_month_prelim, "MY")
format year_month_gs %tm
drop year_month_prelim
reshape wide sales_prelim, i(year_month_gs) j(rest_type) string
reshape long
replace sales_prelim = (sales_prelim*1000000)
format sales_prelim %15.0g
rename sales_prelim spend_

reshape wide spend_, i(year_month_gs) j(rest_type) string
	
save `census_2025', replace

tempfile census

forval yr = 1992/2024 {
	append using `census_`yr''
}

sort year_month_gs 
save `census', replace

********************************************************************
***** 4. Bring in Employee numbers from BLS 
********************************************************************

************* Get BLS total # of employees for restaurants (Naics 7225)
tempfile bls_7225
import excel "${raw_root}/bls_emp/restaurants_emp_nsa_feb_2026.xlsx", clear firstrow

keep if SeriesID == "CEU7072250001"

replace Period = substr(Period,2,2)
destring Period, replace
gen year_month_gs = ym(Year, Period)
format year_month_gs %tm
rename Value emp_rest 
replace emp_rest = emp_rest*1000
keep year_month_gs emp_rest

save `bls_7225', replace

****************** BLS aggregate hours of emp for naics code 722511 (Full-Service Restaurants)
tempfile bls_722511
import excel "${raw_root}/bls_emp/full_service_emp_nsa_feb_2026.xlsx", clear firstrow

keep if SeriesID == "CEU7072251101"

replace Period = substr(Period,2,2)
destring Period, replace
gen year_month_gs = ym(Year, Period)
format year_month_gs %tm
rename Value emp_full 
replace emp_full = emp_full*1000
keep year_month_gs emp_full
save `bls_722511', replace


****************** BLS aggregate hours of emp for Food and Drink Places (NAICS 722) SEASONALLY ADJUSTED
tempfile bls_722_sa
import excel "${raw_root}/bls_emp/food_service_emp_sa_feb_2026.xlsx", clear firstrow

keep if SeriesID == "CES7072200001"

replace Period = substr(Period,2,2)
destring Period, replace
gen year_month_gs = ym(Year, Period)
format year_month_gs %tm
rename Value emp_food_sa 
replace emp_food_sa = emp_food_sa*1000
keep year_month_gs emp_food_sa
save `bls_722_sa', replace

****************** BLS aggregate hours of emp for Food and Drink Places (NAICS 722) NOT SEASONALLY ADJUSTED
tempfile bls_722_nsa
import excel "${raw_root}/bls_emp/food_service_emp_nsa_feb_2026.xlsx", clear firstrow

keep if SeriesID == "CEU7072200001"

replace Period = substr(Period,2,2)
destring Period, replace
gen year_month_gs = ym(Year, Period)
format year_month_gs %tm
rename Value emp_food_nsa 
replace emp_food_nsa = emp_food_nsa*1000
keep year_month_gs emp_food_nsa
save `bls_722_nsa', replace

*** merge in emp counts together
merge 1:1 year_month_gs using `bls_7225', nogen 
merge 1:1 year_month_gs using `bls_722511', nogen 
merge 1:1 year_month_gs using `bls_722_sa', nogen

tempfile bls_emp
** Create employees for limited service eating places by subtracting the full service employees from all restaurant (naics 7225) employees
gen emp_limited = emp_rest - emp_full

keep year_month_gs emp_limited emp_food_sa emp_food_nsa emp_rest emp_full
save `bls_emp', replace

********************************************************************************
****** 6. Merge Everything Together
********************************************************************************
use `census', clear

merge 1:1 year_month_gs using `bls_emp', nogen
merge 1:1 year_month_gs using `sg', nogen 
merge 1:1 year_month_gs using `deflators', nogen
keep if year_month_gs >= monthly("jan1992", "MY")

********************************************************************************
********** 7. Cleaning and Processing
********************************************************************************
sort year_month_gs
drop in L // January 2026 not used

* Step 1 - Deflate Census Spend (SG Spend is already deflated)
foreach var of varlist spend_limited spend_food_sa spend_food_nsa {
	gen nominal_`var' = `var'
} 
replace spend_limited = (100/cpi_limited)*spend_limited 
*replace spend_food_sa = (100/cpi_food_sa)*spend_food_sa // adjusted slightly differently for Fig 1
replace spend_food_nsa = (100/cpi_food_nsa)*spend_food_nsa 
gen spend_sg_food_cpi = (100/cpi_food_nsa)*nominal_spend_sg
gen spend_limited_food_cpi = (100/cpi_food_nsa)*nominal_spend_limited

* Step 2a - gen real sales per emp. Matching Excel.
gen adj_emp_food_sa = emp_food_sa / 1000
gen rel_price_food_sa = cpi_food_sa / cpi_food_sa[_N]
gen real_sales = (spend_food_sa / 1000000) / rel_price_food_sa
gen ann_real_sales_pr_emp_food_sa = 12*1000*real_sales/(emp_food_sa / 1000)

* Step 2b - gen spend prod and visits prod
gen prod_sg = spend_sg/emp_sg 
gen visits_prod_sg = visits_sg/emp_sg 
gen prod_food_sa = spend_food_sa/emp_food_sa
gen prod_food_nsa = spend_food_nsa/emp_food_nsa 
gen prod_sg_food_cpi = spend_sg_food_cpi/emp_sg 
gen prod_limited_food_cpi = spend_limited_food_cpi/emp_limited
gen prod_limited = spend_limited/emp_limited 

******************
** Index to 1992 Avg 
sort year_month_gs
foreach var of varlist prod_food_sa prod_food_nsa prod_limited_food_cpi {
	egen `var'_1992 = mean(`var') if year_month_gs <= monthly("dec1992", "MY")
	gen `var'_idx1992 = 100*(`var'/`var'_1992[1])
	drop `var'_1992
}

*****************
*** Index to 2019 Avg 
sort year_month_gs
foreach var of varlist prod_limited prod_limited_food_cpi prod_sg prod_sg_food_cpi visits_prod_sg spend_limited emp_limited spend_sg visits_sg emp_sg {
	egen `var'_2019 = mean(`var') if year_month_gs <= monthly("dec2019", "MY") & year_month_gs >= monthly("jan2019", "MY")
	egen m`var'_2019 = max(`var'_2019)
	gen `var'_idx2019 = 100*(`var'/m`var'_2019)
	drop `var'_2019 m`var'_2019
}

label var year_month_gs "Year-Month"
label var spend_sg_idx2019 "SafeGraph Sales"
label var visits_sg_idx2019 "SafeGraph Visits"
label var emp_sg_idx2019 "SafeGraph Employees"
label var spend_limited_idx2019 "Census Sales"
label var emp_limited_idx2019 "BLS Employees"
label var prod_limited_idx2019 "Census/BLS Spend Productivity"
label var prod_sg_idx2019 "SafeGraph Spend Productivity"
label var visits_prod_sg_idx2019 "SafeGraph Visits Productivity"
label var prod_limited_food_cpi_idx2019 "Census/BLS Productivity (Food away from home CPI)"
label var prod_sg_food_cpi_idx2019 "SG Productivity (Food away from home CPI)"

********************************************************************************
*** 8. Save Dataset
********************************************************************************
save "${built_root}/aggregates_build.dta", replace
